********************************************************************************
* DESCRIPTION: Function to load user-written programs.
*
* INPUT:       - None.
*
* OUTPUT:      - Loaded data in memory.
*
********************************************************************************
********************************************************************************
* MAIN CODE
********************************************************************************
*** prepare
* clear all programs from memory
program drop _all


*** define programs
* describe, summarize, and compress data
program prog_desc_sum_comp // input: none; output = none
	desc
	sum, sep(0)
	compress
end

* describe, summarize, compress, and save data
program prog_desc_sum_comp_save // input: `1' = file name; output = 
	desc
	sum, sep(0)
	compress
	save "`1'", replace
end

* insert comma separator for thousands in strings
program prog_comma_thousands // input: `1' = name of string variable, `2' = observation number; output = comma separated thousands in string values
	destring `1', force generate(`1'_num)
	sum `1'_num if _n == `2', meanonly
	local N = r(mean)
	if `N' < . {
		if `N' >= 10^12 replace `1' = substr("`N'", 1, mod(strlen("`N'"), 3)) + "," + substr("`N'", -12, 3) + "," + substr("`N'", -9, 3) + "," + substr("`N'", -6, 3) + "," + substr("`N'", -3, 3) if _n == `2'
		else if `N' >= 10^11 replace `1' = substr("`N'", -12, 3) + "," + substr("`N'", -9, 3) + "," + substr("`N'", -6, 3) + "," + substr("`N'", -3, 3) if _n == `2'
		else if `N' >= 10^9 replace `1' = substr("`N'", 1, mod(strlen("`N'"), 3)) + "," + substr("`N'", -9, 3) + "," + substr("`N'", -6, 3) + "," + substr("`N'", -3, 3) if _n == `2'
		else if `N' >= 10^8 replace `1' = substr("`N'", -9, 3) + "," + substr("`N'", -6, 3) + "," + substr("`N'", -3, 3) if _n == `2'
		else if `N' >= 10^6 replace `1' = substr("`N'", 1, mod(strlen("`N'"), 3)) + "," + substr("`N'", -6, 3) + "," + substr("`N'", -3, 3) if _n == `2'
		else if `N' >= 10^5 replace `1' = substr("`N'", -6, 3) + "," + substr("`N'", -3, 3) if _n == `2'
		else if `N' >= 10^3 replace `1' = substr("`N'", 1, mod(strlen("`N'"), 3)) + "," + substr("`N'", -3, 3) if _n == `2'
		else if `N' >= 10^2 replace `1' = substr("`N'", -3, 3) if _n == `2'
	}
	drop `1'_num
end

* compute and display number of unique observations
program prog_unique_vals // input: `1' = name of variable for which to compute unique values
	${gtools}egen double `1'_group = group(`1')
	qui sum `1'_group, meanonly
	global `1'_N = r(max)
	drop `1'_group
	disp "--> number of unique values of variable `1' = ${`1'_N}"
end

* coarsen variables to be included as indicators in AKM regression
program prog_coarsen // input: `1' = name of variable to be coarsened; `2' = minimum number of observations in each (coarsened) category
	disp "--> coarsening variable `1' up to minimum category size `2':"
	sum `1', meanonly
	global coarsen_val = 10^ceil(log10(r(max))) - 1 // use as missing code the largest number within the same order of magnitude as the variable's maximum
	
	if r(min) <= 0 replace `1' = `1' - r(min) + 1 // ensure categorical variable starts from 1, 2, ...
	
	forval g = 0/$akm_by_categ {
		local g_plus_1 = `g' + 1
		if $akm_by_categ disp "... ${categ_var} = `g_plus_1'"
		if `g' == 0 & $akm_by_categ == 0 {
			local categ_bys = ""
			local categ_cond = ""
			local categ_cond_add = ""
		}
		else {
			local categ_bys = "${categ_var}"
			local categ_cond = "if ${categ_var} == `g_plus_1'"
			local categ_cond_add = "& ${categ_var} == `g_plus_1'"
		}
		local needs_coarsening = 1
		while `needs_coarsening' {
			bys `1' `categ_bys': gen N = _N `categ_cond'
			sum N `categ_cond', meanonly
			local N_min = r(min)
			local needs_coarsening = (`N_min' < `2')
			if `needs_coarsening' { // if there exists some category that needs coarsening
				sum N if `1' == ${coarsen_val} `categ_cond_add', meanonly
				local N_coarsened = r(mean)
				if `N_coarsened' < `2' { // if the coarsened category itself has too few observations, then merge with the (next) smallest noncoarsened category.
					sum N if `1' != ${coarsen_val} `categ_cond_add', meanonly
					local N_min = r(min)
					replace `1' = ${coarsen_val} if N == `N_min' `categ_cond_add'
				}
				else replace `1' = ${coarsen_val} if N == `N_min' `categ_cond_add' // coarsened category = 999999 // else, if the coarsened category has enough observations, then recode the category with insufficient observations to coarsened value.
			}
			drop N
		}
	}
end

* check if -gtools- package is applicable (i.e., if number of observations < 2^31 - 1)
program prog_gtools_check // input: `1' = factor by which current number of observations will increase during next operation (e.g., -reshape-)
	if "`1'" != "" local factor = `1'
	else local factor = 1
	qui count
	local N_count = r(N)
	if `factor'*`N_count' <= 2^31 - 1 global gtools_check = "g"
	else global gtools_check = ""
end

* creates a directory name of the rank, as opposed to the number
program prog_rank_name
	if ${rank_xx} == 1 global rank_name_now = "age_emp"
	else if ${rank_xx} == 2 global rank_name_now = "fe"
	else if ${rank_xx} == 3 global rank_name_now = "inc"
	else if ${rank_xx} == 4 global rank_name_now = "net_poaching_index"
	else if ${rank_xx} == 5 global rank_name_now = "net_poaching_index_noties"
	else if ${rank_xx} == 6 global rank_name_now = "net_poaching_index_robust"
	else if ${rank_xx} == 7 global rank_name_now = "net_poaching_index_robust_noties"
	else if ${rank_xx} == 8 global rank_name_now = "pagerank"
	else if ${rank_xx} == 9 global rank_name_now = "pagerank_noties"
	else if ${rank_xx} == 10 global rank_name_now = "poaching_index"
	else if ${rank_xx} == 11 global rank_name_now = "poaching_index_noties"
	else if ${rank_xx} == 12 global rank_name_now = "poaching_index_robust"
	else if ${rank_xx} == 13 global rank_name_now = "poaching_index_robust_noties"
	else if ${rank_xx} == 14 global rank_name_now = "retention_index"
	else if ${rank_xx} == 15 global rank_name_now = "retention_index_noties"
	else if ${rank_xx} == 16 global rank_name_now = "retention_index_robust"
	else if ${rank_xx} == 17 global rank_name_now = "retention_index_robust_noties"
	else if ${rank_xx} == 18 global rank_name_now = "size"
	else if ${rank_xx} == 19 global rank_name_now = "tenure"
end


********************************************************************************
* RETURNING TO MAIN CODE
********************************************************************************
